Clean data

Load data

dat <- readxl::read_xlsx("/Volumes/manhnd/Yi_roe_data.xlsx")
# filter variables for analyis
dat <- dat %>% dplyr::select("serialno", "country", "papm", "gender", "age", "education", "marital", "occupation", "live_area", "mobile", "internet")

Transform data

# Remove the gender as others (18/2912)
dat1 <- dat %>%
  filter(gender != 3 & gender != 4) %>%
  mutate(marital = dplyr::recode(marital, `4` = 3, `5` = 3))

# Transform the valuables
dat1 <- dat1 %>%
  mutate(
    papm = factor(papm, ordered = TRUE),
    gender = factor(gender, labels = c("Male", "Female")),
    marital = factor(marital, labels = c("Never married", "Married/living with a partner", "Separated/widowed/divorced")),
    education = factor(education, labels = c("No formal education", "Primary school", "Secondary school", "Diploma", "Degree")),
    occupation = factor(occupation, labels = c("Employed", "Unemployed", "Student", "Retiree", "Homemaker")),
    live_area = factor(live_area, labels = c("Urban", "Peri-urban", "Rural", "Slums")),
    mobile = factor(mobile, labels = c("No", "Yes feature phone", "Yes smart phone")),
    internet = factor(internet, labels = c("No", "Yes")),
    country = factor(country)
  )%>%
  mutate(education = fct_relevel(education,"Degree", "Diploma", "Secondary school", "Primary school", "No formal education"), 
         mobile = fct_relevel(mobile, "Yes smart phone", "Yes feature phone", "No"))


summary(dat1)
##     serialno             country    papm        gender          age       
##  Min.   :   1.0   Philippines:459   1:1596   Male  :1365   Min.   :18.00  
##  1st Qu.: 726.2   Uganda     :395   2: 291   Female:1529   1st Qu.:22.00  
##  Median :1453.5   Indonesia  :368   3: 285                 Median :30.00  
##  Mean   :1455.6   Zimbabwe   :292   4: 179                 Mean   :34.95  
##  3rd Qu.:2185.8   Cameroon   :289   5: 543                 3rd Qu.:44.00  
##  Max.   :2912.0   Bangladesh :280                          Max.   :90.00  
##                   (Other)    :811                                         
##                education                             marital    
##  Degree             : 658   Never married                :1221  
##  Diploma            : 405   Married/living with a partner:1395  
##  Secondary school   :1182   Separated/widowed/divorced   : 278  
##  Primary school     : 476                                       
##  No formal education: 173                                       
##                                                                 
##                                                                 
##       occupation        live_area                  mobile     internet  
##  Employed  :1294   Urban     :1066   Yes smart phone  :2011   No : 839  
##  Unemployed: 566   Peri-urban: 374   Yes feature phone: 626   Yes:2055  
##  Student   : 681   Rural     :1281   No               : 257             
##  Retiree   :  65   Slums     : 173                                      
##  Homemaker : 288                                                        
##                                                                         
## 
str(dat1)
## tibble [2,894 × 11] (S3: tbl_df/tbl/data.frame)
##  $ serialno  : num [1:2894] 1 2 3 4 5 6 7 8 9 10 ...
##  $ country   : Factor w/ 9 levels "Bangladesh","Cameroon",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ papm      : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender    : Factor w/ 2 levels "Male","Female": 2 2 2 2 2 1 1 2 1 2 ...
##  $ age       : num [1:2894] 29 36 52 27 31 31 70 50 49 45 ...
##  $ education : Factor w/ 5 levels "Degree","Diploma",..: 4 5 5 4 4 4 5 5 5 5 ...
##  $ marital   : Factor w/ 3 levels "Never married",..: 2 2 3 2 2 2 2 2 2 2 ...
##  $ occupation: Factor w/ 5 levels "Employed","Unemployed",..: 5 5 1 5 5 1 2 5 1 5 ...
##  $ live_area : Factor w/ 4 levels "Urban","Peri-urban",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ mobile    : Factor w/ 3 levels "Yes smart phone",..: 2 2 2 2 2 2 2 3 2 2 ...
##  $ internet  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

Visualize some variables

papm versus countries

dat2 <- dat1 %>%
  select("country", "papm") %>%
  group_by(country, papm) %>%
  summarise(n = n()) %>%
  mutate(proportion_papm = n / sum(n)) %>%
  ungroup()
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
ggplot(data = dat2, aes(x = country, y = proportion_papm)) +
  geom_col(width = 0.5) +
  facet_grid(~papm) +
  coord_flip()

Education versus countries

dat2 <- dat %>%
  select("country", "education") %>%
  group_by(country, education) %>%
  summarise(n = n()) %>%
  mutate(proportion_education = n / sum(n)) %>%
  ungroup()
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
ggplot(data = dat2, aes(x = country, y = proportion_education)) +
  geom_col(width = 0.5) +
  facet_grid(~education) +
  coord_flip()

MASS package: polr modell

Fit the modell without country interaction

m1 <- polr(formula = papm ~ country + gender + age + education + marital + occupation + live_area + mobile + internet, data = dat1, Hess = TRUE)

summary(m1)
## Call:
## polr(formula = papm ~ country + gender + age + education + marital + 
##     occupation + live_area + mobile + internet, data = dat1, 
##     Hess = TRUE)
## 
## Coefficients:
##                                          Value Std. Error  t value
## countryCameroon                       1.030759   0.304410  3.38608
## countryIndia                          3.224449   0.272147 11.84819
## countryIndonesia                      1.723011   0.279531  6.16393
## countryKenya                          2.155603   0.310188  6.94934
## countryNepal                          3.933488   0.282815 13.90836
## countryPhilippines                    1.061483   0.286370  3.70668
## countryUganda                         4.131838   0.272307 15.17348
## countryZimbabwe                       4.363332   0.287154 15.19507
## genderFemale                         -0.005774   0.085817 -0.06728
## age                                  -0.001336   0.003866 -0.34556
## educationDiploma                     -0.121080   0.142585 -0.84918
## educationSecondary school            -0.346340   0.116812 -2.96494
## educationPrimary school              -0.724127   0.157936 -4.58493
## educationNo formal education         -1.204538   0.220265 -5.46858
## maritalMarried/living with a partner  0.243056   0.122772  1.97974
## maritalSeparated/widowed/divorced     0.602569   0.181871  3.31316
## occupationUnemployed                 -0.183366   0.118656 -1.54535
## occupationStudent                    -0.156349   0.134595 -1.16163
## occupationRetiree                    -0.556414   0.284432 -1.95623
## occupationHomemaker                  -0.404099   0.151572 -2.66605
## live_areaPeri-urban                  -0.019125   0.135460 -0.14118
## live_areaRural                        0.056864   0.116823  0.48675
## live_areaSlums                        0.341687   0.225495  1.51527
## mobileYes feature phone               0.359186   0.161274  2.22718
## mobileNo                             -0.217412   0.212906 -1.02116
## internetYes                           0.491148   0.168225  2.91959
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2  2.8558  0.3410     8.3741
## 2|3  3.4972  0.3431    10.1942
## 3|4  4.1509  0.3446    12.0459
## 4|5  4.6134  0.3457    13.3433
## 
## Residual Deviance: 6159.317 
## AIC: 6219.317

Fit the model with country interaction

# First fit the model with interaction between country and all of the independent variables but the model cannot run. Then I removed the interaction between live_area and country, model can run as below
m2 <- polr(formula = papm ~ country + gender + age + education + marital + occupation + live_area + mobile + internet + gender:country + age:country + marital:country + occupation:country + mobile:country + internet:country + education:country, data = dat1, Hess = TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m2)
## Call:
## polr(formula = papm ~ country + gender + age + education + marital + 
##     occupation + live_area + mobile + internet + gender:country + 
##     age:country + marital:country + occupation:country + mobile:country + 
##     internet:country + education:country, data = dat1, Hess = TRUE)
## 
## Coefficients:
##                                                              Value Std. Error
## countryCameroon                                           2.492508  1.458e+00
## countryIndia                                              3.690055  1.163e+00
## countryIndonesia                                        -16.791134  7.346e-01
## countryKenya                                              2.844510  1.205e+00
## countryNepal                                              4.393497  1.633e+00
## countryPhilippines                                       -1.031332  1.258e+00
## countryUganda                                             4.564882  1.344e+00
## countryZimbabwe                                           6.316144  1.123e+00
## genderFemale                                              0.353911  5.555e-01
## age                                                       0.077287  3.691e-02
## educationDiploma                                         -0.314910  6.961e-01
## educationSecondary school                                -1.677119  8.788e-01
## educationPrimary school                                 -15.839338  2.465e-01
## educationNo formal education                            -11.798116  3.590e-01
## maritalMarried/living with a partner                     -0.838797  1.171e+00
## maritalSeparated/widowed/divorced                        -2.453085  2.089e-01
## occupationUnemployed                                    -16.543165  1.461e-01
## occupationStudent                                         0.076169  1.273e+00
## occupationRetiree                                       -18.168921  3.829e-01
## occupationHomemaker                                     -12.483968  2.274e-01
## live_areaPeri-urban                                      -0.090746  1.443e-01
## live_areaRural                                           -0.001863  1.294e-01
## live_areaSlums                                            0.248736  2.547e-01
## mobileYes feature phone                                   0.076845  8.664e-01
## mobileNo                                                -11.975362  3.784e-01
## internetYes                                               0.064900  6.905e-01
## countryCameroon:genderFemale                             -0.473963  6.682e-01
## countryIndia:genderFemale                                 0.075206  6.091e-01
## countryIndonesia:genderFemale                            -0.611062  6.185e-01
## countryKenya:genderFemale                                -0.178951  6.506e-01
## countryNepal:genderFemale                                -0.674422  6.128e-01
## countryPhilippines:genderFemale                          -0.090383  6.377e-01
## countryUganda:genderFemale                               -0.476893  5.835e-01
## countryZimbabwe:genderFemale                             -0.918863  6.356e-01
## countryCameroon:age                                      -0.074917  4.380e-02
## countryIndia:age                                         -0.063573  3.861e-02
## countryIndonesia:age                                     -0.033716  4.141e-02
## countryKenya:age                                         -0.031333  4.090e-02
## countryNepal:age                                         -0.064795  3.900e-02
## countryPhilippines:age                                   -0.081053  4.004e-02
## countryUganda:age                                        -0.073390  3.747e-02
## countryZimbabwe:age                                      -0.122210  4.074e-02
## countryCameroon:maritalMarried/living with a partner      1.401055  1.260e+00
## countryIndia:maritalMarried/living with a partner         0.957308  1.223e+00
## countryIndonesia:maritalMarried/living with a partner     0.699653  1.219e+00
## countryKenya:maritalMarried/living with a partner         0.764618  1.252e+00
## countryNepal:maritalMarried/living with a partner         0.361383  1.231e+00
## countryPhilippines:maritalMarried/living with a partner   1.803508  1.269e+00
## countryUganda:maritalMarried/living with a partner        0.900287  1.207e+00
## countryZimbabwe:maritalMarried/living with a partner      2.361908  1.246e+00
## countryCameroon:maritalSeparated/widowed/divorced       -12.787101  9.813e-08
## countryIndia:maritalSeparated/widowed/divorced            2.629842  6.726e-01
## countryIndonesia:maritalSeparated/widowed/divorced        0.976450  6.735e-01
## countryKenya:maritalSeparated/widowed/divorced            2.708058  5.056e-01
## countryNepal:maritalSeparated/widowed/divorced            3.122573  6.261e-01
## countryPhilippines:maritalSeparated/widowed/divorced      3.827174  6.369e-01
## countryUganda:maritalSeparated/widowed/divorced           2.576476  4.045e-01
## countryZimbabwe:maritalSeparated/widowed/divorced         4.439621  5.209e-01
## countryCameroon:occupationUnemployed                     16.766762  4.480e-01
## countryIndia:occupationUnemployed                        16.585153  4.851e-01
## countryIndonesia:occupationUnemployed                    15.560142  4.244e-01
## countryKenya:occupationUnemployed                        16.774072  4.631e-01
## countryNepal:occupationUnemployed                        14.735978  5.289e-01
## countryPhilippines:occupationUnemployed                  16.481371  4.856e-01
## countryUganda:occupationUnemployed                       16.816729  2.401e-01
## countryZimbabwe:occupationUnemployed                     15.691273  3.303e-01
## countryCameroon:occupationStudent                        -0.190208  1.358e+00
## countryIndia:occupationStudent                            0.231223  1.323e+00
## countryIndonesia:occupationStudent                       -0.425000  1.326e+00
## countryKenya:occupationStudent                           -0.404940  1.408e+00
## countryNepal:occupationStudent                           -0.490739  1.325e+00
## countryPhilippines:occupationStudent                      0.680989  1.377e+00
## countryUganda:occupationStudent                          -0.016319  1.325e+00
## countryZimbabwe:occupationStudent                         0.024487  1.359e+00
## countryCameroon:occupationRetiree                         0.304758  1.187e-08
## countryIndia:occupationRetiree                           17.767074  6.078e-01
## countryIndonesia:occupationRetiree                        1.689431  1.275e-07
## countryKenya:occupationRetiree                           18.049822  1.465e+00
## countryNepal:occupationRetiree                           16.822030  5.936e-01
## countryPhilippines:occupationRetiree                     18.010164  8.730e-01
## countryUganda:occupationRetiree                          17.474985  7.974e-01
## countryZimbabwe:occupationRetiree                        36.326894  4.835e-08
## countryCameroon:occupationHomemaker                      11.749791  9.991e-01
## countryIndia:occupationHomemaker                         11.743742  3.820e-01
## countryIndonesia:occupationHomemaker                     11.075992  6.714e-01
## countryKenya:occupationHomemaker                         13.991748  7.157e-01
## countryNepal:occupationHomemaker                         11.785045  4.632e-01
## countryPhilippines:occupationHomemaker                   12.832384  6.032e-01
## countryUganda:occupationHomemaker                        12.899519  3.283e-01
## countryZimbabwe:occupationHomemaker                      10.611522  9.371e-01
## countryCameroon:mobileYes feature phone                  -0.889104  1.172e+00
## countryIndia:mobileYes feature phone                     -0.096117  1.097e+00
## countryIndonesia:mobileYes feature phone                 -1.052960  1.095e+00
## countryKenya:mobileYes feature phone                     -0.417383  9.854e-01
## countryNepal:mobileYes feature phone                      0.318501  1.698e+00
## countryPhilippines:mobileYes feature phone                1.295778  9.753e-01
## countryUganda:mobileYes feature phone                    -0.064905  9.691e-01
## countryZimbabwe:mobileYes feature phone                   2.043451  9.821e-01
## countryCameroon:mobileNo                                 12.033302  1.150e+00
## countryIndia:mobileNo                                    11.389790  8.490e-01
## countryIndonesia:mobileNo                                13.842867  1.918e+00
## countryKenya:mobileNo                                    10.747569  7.419e-01
## countryNepal:mobileNo                                    12.822932  1.573e+00
## countryPhilippines:mobileNo                              13.354683  7.514e-01
## countryUganda:mobileNo                                   11.408845  5.967e-01
## countryZimbabwe:mobileNo                                 10.559411  7.470e-01
## countryCameroon:internetYes                              -0.105901  1.118e+00
## countryIndia:internetYes                                  0.376126  9.805e-01
## countryIndonesia:internetYes                             19.253899  7.346e-01
## countryKenya:internetYes                                 -0.265561  7.991e-01
## countryNepal:internetYes                                  1.159854  1.552e+00
## countryPhilippines:internetYes                            2.063132  9.047e-01
## countryUganda:internetYes                                 0.623627  8.160e-01
## countryZimbabwe:internetYes                              -0.672447  7.812e-01
## countryCameroon:educationDiploma                          0.419893  8.526e-01
## countryIndia:educationDiploma                             0.063448  8.080e-01
## countryIndonesia:educationDiploma                         0.531384  8.129e-01
## countryKenya:educationDiploma                            -1.068472  8.678e-01
## countryNepal:educationDiploma                             0.610159  7.713e-01
## countryPhilippines:educationDiploma                       0.455298  8.284e-01
## countryUganda:educationDiploma                            0.449815  1.250e+00
## countryZimbabwe:educationDiploma                          0.753858  8.291e-01
## countryCameroon:educationSecondary school                 1.146578  9.823e-01
## countryIndia:educationSecondary school                    1.019427  9.235e-01
## countryIndonesia:educationSecondary school                1.111286  9.255e-01
## countryKenya:educationSecondary school                    0.707102  1.029e+00
## countryNepal:educationSecondary school                    1.140659  9.351e-01
## countryPhilippines:educationSecondary school              1.961212  9.644e-01
## countryUganda:educationSecondary school                   1.598778  1.321e+00
## countryZimbabwe:educationSecondary school                 2.537404  9.471e-01
## countryCameroon:educationPrimary school                  13.480297  7.845e-01
## countryIndia:educationPrimary school                     14.918393  4.397e-01
## countryIndonesia:educationPrimary school                 16.274300  8.641e-01
## countryKenya:educationPrimary school                     13.616153  6.510e-01
## countryNepal:educationPrimary school                     14.620560  4.560e-01
## countryPhilippines:educationPrimary school               15.580291  6.774e-01
## countryUganda:educationPrimary school                    15.806130  9.107e-01
## countryZimbabwe:educationPrimary school                  17.459356  9.028e-01
## countryCameroon:educationNo formal education              0.500793  9.243e-05
## countryIndia:educationNo formal education                10.194480  6.177e-01
## countryIndonesia:educationNo formal education            -0.019179  6.323e-06
## countryKenya:educationNo formal education                13.469849  1.186e+00
## countryNepal:educationNo formal education                10.738332  6.282e-01
## countryPhilippines:educationNo formal education          11.951368  8.581e-01
## countryUganda:educationNo formal education               11.263043  9.077e-01
## countryZimbabwe:educationNo formal education             -0.508953  4.836e-06
##                                                            t value
## countryCameroon                                          1.709e+00
## countryIndia                                             3.174e+00
## countryIndonesia                                        -2.286e+01
## countryKenya                                             2.360e+00
## countryNepal                                             2.690e+00
## countryPhilippines                                      -8.197e-01
## countryUganda                                            3.396e+00
## countryZimbabwe                                          5.625e+00
## genderFemale                                             6.371e-01
## age                                                      2.094e+00
## educationDiploma                                        -4.524e-01
## educationSecondary school                               -1.908e+00
## educationPrimary school                                 -6.426e+01
## educationNo formal education                            -3.286e+01
## maritalMarried/living with a partner                    -7.165e-01
## maritalSeparated/widowed/divorced                       -1.174e+01
## occupationUnemployed                                    -1.133e+02
## occupationStudent                                        5.983e-02
## occupationRetiree                                       -4.745e+01
## occupationHomemaker                                     -5.489e+01
## live_areaPeri-urban                                     -6.291e-01
## live_areaRural                                          -1.440e-02
## live_areaSlums                                           9.765e-01
## mobileYes feature phone                                  8.869e-02
## mobileNo                                                -3.164e+01
## internetYes                                              9.398e-02
## countryCameroon:genderFemale                            -7.094e-01
## countryIndia:genderFemale                                1.235e-01
## countryIndonesia:genderFemale                           -9.880e-01
## countryKenya:genderFemale                               -2.751e-01
## countryNepal:genderFemale                               -1.101e+00
## countryPhilippines:genderFemale                         -1.417e-01
## countryUganda:genderFemale                              -8.172e-01
## countryZimbabwe:genderFemale                            -1.446e+00
## countryCameroon:age                                     -1.710e+00
## countryIndia:age                                        -1.646e+00
## countryIndonesia:age                                    -8.143e-01
## countryKenya:age                                        -7.661e-01
## countryNepal:age                                        -1.661e+00
## countryPhilippines:age                                  -2.024e+00
## countryUganda:age                                       -1.959e+00
## countryZimbabwe:age                                     -3.000e+00
## countryCameroon:maritalMarried/living with a partner     1.112e+00
## countryIndia:maritalMarried/living with a partner        7.828e-01
## countryIndonesia:maritalMarried/living with a partner    5.738e-01
## countryKenya:maritalMarried/living with a partner        6.106e-01
## countryNepal:maritalMarried/living with a partner        2.935e-01
## countryPhilippines:maritalMarried/living with a partner  1.421e+00
## countryUganda:maritalMarried/living with a partner       7.459e-01
## countryZimbabwe:maritalMarried/living with a partner     1.895e+00
## countryCameroon:maritalSeparated/widowed/divorced       -1.303e+08
## countryIndia:maritalSeparated/widowed/divorced           3.910e+00
## countryIndonesia:maritalSeparated/widowed/divorced       1.450e+00
## countryKenya:maritalSeparated/widowed/divorced           5.357e+00
## countryNepal:maritalSeparated/widowed/divorced           4.987e+00
## countryPhilippines:maritalSeparated/widowed/divorced     6.009e+00
## countryUganda:maritalSeparated/widowed/divorced          6.369e+00
## countryZimbabwe:maritalSeparated/widowed/divorced        8.524e+00
## countryCameroon:occupationUnemployed                     3.743e+01
## countryIndia:occupationUnemployed                        3.419e+01
## countryIndonesia:occupationUnemployed                    3.666e+01
## countryKenya:occupationUnemployed                        3.622e+01
## countryNepal:occupationUnemployed                        2.786e+01
## countryPhilippines:occupationUnemployed                  3.394e+01
## countryUganda:occupationUnemployed                       7.004e+01
## countryZimbabwe:occupationUnemployed                     4.750e+01
## countryCameroon:occupationStudent                       -1.400e-01
## countryIndia:occupationStudent                           1.747e-01
## countryIndonesia:occupationStudent                      -3.204e-01
## countryKenya:occupationStudent                          -2.877e-01
## countryNepal:occupationStudent                          -3.705e-01
## countryPhilippines:occupationStudent                     4.946e-01
## countryUganda:occupationStudent                         -1.231e-02
## countryZimbabwe:occupationStudent                        1.802e-02
## countryCameroon:occupationRetiree                        2.568e+07
## countryIndia:occupationRetiree                           2.923e+01
## countryIndonesia:occupationRetiree                       1.325e+07
## countryKenya:occupationRetiree                           1.232e+01
## countryNepal:occupationRetiree                           2.834e+01
## countryPhilippines:occupationRetiree                     2.063e+01
## countryUganda:occupationRetiree                          2.191e+01
## countryZimbabwe:occupationRetiree                        7.513e+08
## countryCameroon:occupationHomemaker                      1.176e+01
## countryIndia:occupationHomemaker                         3.074e+01
## countryIndonesia:occupationHomemaker                     1.650e+01
## countryKenya:occupationHomemaker                         1.955e+01
## countryNepal:occupationHomemaker                         2.544e+01
## countryPhilippines:occupationHomemaker                   2.128e+01
## countryUganda:occupationHomemaker                        3.929e+01
## countryZimbabwe:occupationHomemaker                      1.132e+01
## countryCameroon:mobileYes feature phone                 -7.589e-01
## countryIndia:mobileYes feature phone                    -8.759e-02
## countryIndonesia:mobileYes feature phone                -9.612e-01
## countryKenya:mobileYes feature phone                    -4.236e-01
## countryNepal:mobileYes feature phone                     1.876e-01
## countryPhilippines:mobileYes feature phone               1.329e+00
## countryUganda:mobileYes feature phone                   -6.698e-02
## countryZimbabwe:mobileYes feature phone                  2.081e+00
## countryCameroon:mobileNo                                 1.047e+01
## countryIndia:mobileNo                                    1.342e+01
## countryIndonesia:mobileNo                                7.216e+00
## countryKenya:mobileNo                                    1.449e+01
## countryNepal:mobileNo                                    8.150e+00
## countryPhilippines:mobileNo                              1.777e+01
## countryUganda:mobileNo                                   1.912e+01
## countryZimbabwe:mobileNo                                 1.414e+01
## countryCameroon:internetYes                             -9.475e-02
## countryIndia:internetYes                                 3.836e-01
## countryIndonesia:internetYes                             2.621e+01
## countryKenya:internetYes                                -3.323e-01
## countryNepal:internetYes                                 7.474e-01
## countryPhilippines:internetYes                           2.280e+00
## countryUganda:internetYes                                7.642e-01
## countryZimbabwe:internetYes                             -8.608e-01
## countryCameroon:educationDiploma                         4.925e-01
## countryIndia:educationDiploma                            7.852e-02
## countryIndonesia:educationDiploma                        6.537e-01
## countryKenya:educationDiploma                           -1.231e+00
## countryNepal:educationDiploma                            7.911e-01
## countryPhilippines:educationDiploma                      5.496e-01
## countryUganda:educationDiploma                           3.598e-01
## countryZimbabwe:educationDiploma                         9.092e-01
## countryCameroon:educationSecondary school                1.167e+00
## countryIndia:educationSecondary school                   1.104e+00
## countryIndonesia:educationSecondary school               1.201e+00
## countryKenya:educationSecondary school                   6.871e-01
## countryNepal:educationSecondary school                   1.220e+00
## countryPhilippines:educationSecondary school             2.034e+00
## countryUganda:educationSecondary school                  1.211e+00
## countryZimbabwe:educationSecondary school                2.679e+00
## countryCameroon:educationPrimary school                  1.718e+01
## countryIndia:educationPrimary school                     3.393e+01
## countryIndonesia:educationPrimary school                 1.883e+01
## countryKenya:educationPrimary school                     2.092e+01
## countryNepal:educationPrimary school                     3.206e+01
## countryPhilippines:educationPrimary school               2.300e+01
## countryUganda:educationPrimary school                    1.736e+01
## countryZimbabwe:educationPrimary school                  1.934e+01
## countryCameroon:educationNo formal education             5.418e+03
## countryIndia:educationNo formal education                1.650e+01
## countryIndonesia:educationNo formal education           -3.033e+03
## countryKenya:educationNo formal education                1.136e+01
## countryNepal:educationNo formal education                1.709e+01
## countryPhilippines:educationNo formal education          1.393e+01
## countryUganda:educationNo formal education               1.241e+01
## countryZimbabwe:educationNo formal education            -1.052e+05
## 
## Intercepts:
##     Value         Std. Error    t value      
## 1|2  3.652300e+00  8.711000e-01  4.192700e+00
## 2|3  4.339500e+00  8.720000e-01  4.976600e+00
## 3|4  5.043200e+00  8.727000e-01  5.778700e+00
## 4|5  5.541900e+00  8.732000e-01  6.346800e+00
## 
## Residual Deviance: 5866.143 
## AIC: 6166.143
# compare with model 1
anova(m1, m2, test = "Chisq")
## Likelihood ratio tests of ordinal regression models
## 
## Response: papm
##                                                                                                                                                                                                                     Model
## 1                                                                                                                               country + gender + age + education + marital + occupation + live_area + mobile + internet
## 2 country + gender + age + education + marital + occupation + live_area + mobile + internet + gender:country + age:country + marital:country + occupation:country + mobile:country + internet:country + education:country
##   Resid. df Resid. Dev   Test    Df LR stat.      Pr(Chi)
## 1      2864   6159.317                                   
## 2      2744   5866.143 1 vs 2   120 293.1745 1.110223e-16
Anova(m2)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Analysis of Deviance Table (Type II tests)
## 
## Response: papm
##                    LR Chisq Df Pr(>Chisq)    
## country             1052.99  8  < 2.2e-16 ***
## gender                 0.41  1  0.5242950    
## age                    3.39  1  0.0654123 .  
## education             28.65  4  9.193e-06 ***
## marital                3.83  2  0.1473131    
## occupation             7.84  4  0.0977567 .  
## live_area              1.64  3  0.6501930    
## mobile                15.12  2  0.0005211 ***
## internet               2.95  1  0.0860819 .  
## country:gender        10.36  8  0.2405091    
## country:age           22.71  8  0.0037550 ** 
## country:marital       31.39 16  0.0120063 *  
## country:occupation    59.75 32  0.0020810 ** 
## country:mobile        41.60 16  0.0004522 ***
## country:internet      28.18  8  0.0004413 ***
## country:education     70.05 32  0.0001168 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# m2 is beter than m1.

Testing the proportional odds assumption

https://peopleanalytics-regression-book.org/ord-reg.html:

dat_test1 <- dat1 %>% mutate(
  test1 = ifelse(papm == "1", 0, 1),
  test2 = ifelse(papm == "1" | papm == "2", 0, 1),
  test3 = ifelse(papm == "1" | papm == "2" | papm == "3", 0, 1),
  test4 = ifelse(papm == "1" | papm == "2" | papm == "3" | papm == "4", 0, 1)
)
table(dat_test1$papm)
## 
##    1    2    3    4    5 
## 1596  291  285  179  543
m1 <- glm(test1 ~ gender + age + education + marital + occupation + live_area + mobile + internet,
  data = dat_test1 %>% filter(country == "Bangladesh"),
  family = "binomial"
)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m2 <- glm(test2 ~ gender + age + education + marital + occupation + live_area + mobile + internet,
  data = dat_test1 %>% filter(country == "Bangladesh"),
  family = "binomial"
)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m3 <- glm(test3 ~ gender + age + education + marital + occupation + live_area + mobile + internet,
  data = dat_test1 %>% filter(country == "Bangladesh"),
  family = "binomial"
)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m4 <- glm(test4 ~ gender + age + education + marital + occupation + live_area + mobile + internet,
  data = dat_test1 %>% filter(country == "Bangladesh"),
  family = "binomial"
)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
coefficient_comparison <- data.frame(
  test1 = summary(m1)$coefficients[, "Estimate"],
  test2 = summary(m2)$coefficients[, "Estimate"],
  test3 = summary(m3)$coefficients[, "Estimate"],
  test4 = summary(m4)$coefficients[, "Estimate"],
  diff1_2 = summary(m2)$coefficients[, "Estimate"] - summary(m1)$coefficients[, "Estimate"],
  diff1_3 = summary(m3)$coefficients[, "Estimate"] - summary(m1)$coefficients[, "Estimate"],
  diff1_4 = summary(m4)$coefficients[, "Estimate"] - summary(m1)$coefficients[, "Estimate"]
)
coefficient_comparison
##                                             test1        test2        test3
## (Intercept)                           -4.77595931  -4.77595931  -6.10420463
## genderFemale                           0.38700242   0.38700242   0.09338424
## age                                    0.08512152   0.08512152   0.08291879
## educationDiploma                      -0.31591491  -0.31591491  -1.44785760
## educationSecondary school             -1.77525568  -1.77525568  -1.66613005
## educationPrimary school              -18.01603203 -18.01603203 -19.13045687
## educationNo formal education         -19.54783604 -19.54783604 -20.62539568
## maritalMarried/living with a partner  -0.65031291  -0.65031291  -0.56711481
## maritalSeparated/widowed/divorced     -2.11973287  -2.11973287  -1.11113159
## occupationUnemployed                 -18.58084035 -18.58084035 -19.58069143
## occupationStudent                      0.11277406   0.11277406  -0.67586541
## occupationRetiree                    -20.44333003 -20.44333003 -20.16806042
## occupationHomemaker                  -16.21747376 -16.21747376 -16.88829039
## live_areaPeri-urban                   -0.45811434  -0.45811434   1.00093169
## live_areaRural                         0.77632174   0.77632174   2.28880765
## mobileYes feature phone                0.05521909   0.05521909   0.02118535
## mobileNo                             -14.11045568 -14.11045568 -14.20706682
## internetYes                            0.28136377   0.28136377   0.38110277
##                                             test4 diff1_2      diff1_3
## (Intercept)                          -34.93972307       0 -1.328245323
## genderFemale                          -0.11672626       0 -0.293618176
## age                                    0.03502682       0 -0.002202736
## educationDiploma                     -23.35197545       0 -1.131942693
## educationSecondary school             -1.74605070       0  0.109125629
## educationPrimary school              -19.83231260       0 -1.114424839
## educationNo formal education         -20.91176453       0 -1.077559642
## maritalMarried/living with a partner  17.96932351       0  0.083198108
## maritalSeparated/widowed/divorced     18.74805331       0  1.008601277
## occupationUnemployed                 -16.15798504       0 -0.999851084
## occupationStudent                     -9.07947626       0 -0.788639469
## occupationRetiree                     -1.73019042       0  0.275269612
## occupationHomemaker                  -17.96936151       0 -0.670816628
## live_areaPeri-urban                   18.80811465       0  1.459046033
## live_areaRural                        20.42660607       0  1.512485918
## mobileYes feature phone               -5.92148716       0 -0.034033741
## mobileNo                              -5.33094350       0 -0.096611142
## internetYes                           -5.93206069       0  0.099739006
##                                           diff1_4
## (Intercept)                          -30.16376376
## genderFemale                          -0.50372868
## age                                   -0.05009471
## educationDiploma                     -23.03606053
## educationSecondary school              0.02920498
## educationPrimary school               -1.81628057
## educationNo formal education          -1.36392849
## maritalMarried/living with a partner  18.61963642
## maritalSeparated/widowed/divorced     20.86778618
## occupationUnemployed                   2.42285531
## occupationStudent                     -9.19225032
## occupationRetiree                     18.71313961
## occupationHomemaker                   -1.75188775
## live_areaPeri-urban                   19.26622899
## live_areaRural                        19.65028434
## mobileYes feature phone               -5.97670625
## mobileNo                               8.77951218
## internetYes                           -6.21342446

We could see there is a large difference coefficients. The assumption of proportional odds is violated. We need to use different methods. The book recommend some other methods explain more details in Agresti (2010))

  • Baseline logistic model. This model is the same as the multinomial regression model covered in the previous chapter, using the lowest ordinal value as the reference.
  • Adjacent-category logistic model. This model compares each level of the ordinal variable to the next highest level, and it is a constrained version of the baseline logistic model. The brglm2 package in R offers a function bracl() for calculating an adjacent category logistic model.
  • Continuation-ratio logistic model. This model compares each level of the ordinal variable to all lower levels. This can be modeled using binary logistic regression techniques, but new variables need to be constructed from the data set to allow this. The R package rms has a function cr.setup() which is a utility for preparing an outcome variable for a continuation ratio model.

I still fit the model to individual country to see where the issues occur. When the absolute Value of the predictor is greater than 10, I make table for that independent variable and the outcome for that country.

Calculate the confident interval

I use Wald test to obtain the CI because the likelihood ratio test doesn’t work

countryList <- c("Bangladesh", "Cameroon", "India", "Indonesia", "Kenya", "Nepal", "Philippines", "Uganda", "Zimbabwe")

fun <- function(data, countryList) {
  allData <- list()

  for (ctry in countryList) {
    m <- polr(formula = papm ~ gender + age + education + marital + occupation + live_area + mobile + internet, data = dat1 %>% filter(country == ctry), Hess = TRUE)

    ci <- confint.default(m)

    tmp <- as.data.frame(round(exp(cbind(OR = coef(m), ci)), 2)) %>%
      mutate(
        country = ctry,
        predictor = row.names(m)
      )

    allData[[ctry]] <- tmp
  }

  combinedData <- do.call(rbind, allData)

  return(combinedData)
}


tmp <- fun(dat1, countryList)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in polr(formula = papm ~ gender + age + education + marital + occupation
## + : design appears to be rank-deficient, so dropping some coefs
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in polr(formula = papm ~ gender + age + education + marital + occupation
## + : design appears to be rank-deficient, so dropping some coefs

## Warning in polr(formula = papm ~ gender + age + education + marital + occupation
## + : design appears to be rank-deficient, so dropping some coefs

Visualize the result

Clean the result

tmp1 <- tmp %>%
  mutate(independant_var = row.names(tmp)) %>%
  separate(independant_var, c("count", "var"), sep = "\\.") %>%
  mutate(var = str_replace_all(var, c(
    "gender" = "gender.",
    "age" = "age.Age",
    "education" = "education.",
    "marital" = "marital.",
    "occupation" = "occupation.",
    "live_area" = "live_area.",
    "mobile" = "mobile.",
    "internet" = "internet."
  ))) %>%
  separate(var, c("var", "level"), "\\.") %>%
  mutate(
    OR = ifelse(OR >= 1000, 0, OR),
    `2.5 %` = ifelse(`2.5 %` >= 1000, 0, `2.5 %`),
    `97.5 %` = ifelse(`97.5 %` >= 1000, 0, `97.5 %`)
  ) 
## Warning: Expected 2 pieces. Additional pieces discarded in 9 rows [6, 23, 41, 59, 77,
## 95, 112, 130, 148].

Gender

tmp2 <- tmp1 %>%
  filter(var == "gender")

p <- ggplot(data = tmp2, aes(x = level, y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1")+
  labs(title = "Reference - Male"
  )


ggplotly(p)

Age

tmp2 <- tmp1 %>%
  filter(var == "age") %>%
  mutate(level = as.factor(level))

p <- ggplot(data = tmp2, aes(x = level, y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1")+
  labs(title = "Reference - Contunious"
  )


ggplotly(p)

Education

tmp2 <- tmp1 %>%
  filter(var == "education") %>%
  mutate(level = as.factor(level))

p <- ggplot(data = tmp2, aes(x = fct_inorder(level), y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Reference - Employed", 
       x = "level"
  )


ggplotly(p)
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: `position_dodge()` requires non-overlapping x intervals

Live area

tmp2 <- tmp1 %>%
  filter(var == "live_area") %>%
  mutate(level = as.factor(level))

p <- ggplot(data = tmp2, aes(x = level, y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1")+
  labs(title = "Reference - Urban"
  )


ggplotly(p)
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: `position_dodge()` requires non-overlapping x intervals

Mobile

tmp2 <- tmp1 %>%
  filter(var == "mobile") %>%
  mutate(level = as.factor(level))

p <- ggplot(data = tmp2, aes(x = fct_inorder(level), y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Reference - Yes smart phone"
  )


ggplotly(p)
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: `position_dodge()` requires non-overlapping x intervals

Internet

tmp2 <- tmp1 %>%
  filter(var == "internet") %>%
  mutate(level = as.factor(level))

p <- ggplot(data = tmp2, aes(x = level, y = OR, colour = country)) +
  geom_line(group = 1, position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = `2.5 %`, ymax = `97.5 %`), position = position_dodge(0.3), width = 2) +
  geom_point(size = 3, position = position_dodge(0.3)) +
  scale_y_continuous(trans = "log10") +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Reference - No"
  )


ggplotly(p)
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis

We can use anova() function to obtain the P value for each independent variable using likeli-hood ratio test